"""
    案例：编程题3，三国演义
    日期：2022-08
"""
import jieba

# 1、读取文件
txt = open("../三国演义.txt", "r", encoding="utf-8").read()

# 2、分词
words = jieba.cut(txt)

# 3、统计
excludes = {"将军", "却说", "丞相", "二人", "不可", "荆州", "不能", "如此", "商议", "如何", "主公", "军士", "左右",
            "军马", "引兵", "次日", "大喜", "天下", "于是", "今日", "不敢", "魏兵", "陛下", "人马", "都督", "一人",
            "不知", "东吴", "汉中", "众将", "只见", "蜀兵", "大叫", "上马", "此人", "先主", "太守", "天子", "后人", "背后"}
counts = {}
for w in words:
    if (len(w) < 2) or (len(w) > 4) or (w in excludes):
        continue
    if w == "皇叔" or w == "玄德" or w == "玄德曰":
        w = "刘备"
    if w == "关公" or w == "云长":
        w = "关羽"
    if w == "翼德":
        w = "张飞"
    if w == "孟德":
        w = "曹操"
    if w == "孔明" or w == "卧龙先生" or w == "孔明曰":
        w = "诸葛亮"
    counts[w] = counts.get(w, 0) + 1

# 4、顺序
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)

# 5、结果
for i in range(10):
    c, count = items[i]
    print("{:<10} ==> {:>5}".format(c, count))
